version 2023.11.27_11.25.57 adds per-patient analysis version 2024.02.16_15.52.13 adds a table to add to appendix tables

underscore_to_space <- function(x) str_replace_all(x, "_", " ")
underscore_to_space_initial_cap <- function(x) str_replace_all(x, "_", " ") %>% str_to_sentence()
outliers <- read_tsv("../input_data/druggable_outliers_from_treehouse_and_other_cohorts_2023_11_09-13_46_32_2023.tsv") %>%
  mutate(high_level_cohort = ifelse(str_detect(comparison_cohort, "Treehouse"),
                                    "Treehouse",
                                    comparison_cohort))
## Rows: 287 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Sample_ID, comparison_cohort, gene, donor_ID
## lgl (1): pathway_support
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
n_distinct(outliers$Sample_ID)       
## [1] 34
n_distinct(outliers$donor_ID)
## [1] 32

Define cohort codes

cohort_codes <- tibble(
  cohort_name = 
    c("PEDAYA", "TCGA", "TH03_TH34", "Treehouse_pc", "Treehouse_pd"),
  cohort_code = 
    c("P", "T", "S", "C", "D"))

Tile plot of all outliers

ggplot(outliers) +
  geom_tile(aes(x=comparison_cohort,
                y=gene, 
                fill = comparison_cohort)) +
  facet_wrap(~Sample_ID,
             nrow = 1) +
  theme(#axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    axis.text.x = element_blank(),
    strip.text.x = element_text(angle = 90),
        ) +
  xlab("")  +
  scale_fill_bright()

Heatmap shows number of cohorts in which outlier were detected

I can make this look better if we decide to use it, but it’s non-trivial

outliers_heatmap_data <- outliers %>%
  group_by(Sample_ID, gene) %>%
  summarize(n_outliers = n()) 
## `summarise()` has grouped output by 'Sample_ID'. You can override using the
## `.groups` argument.
ggplot(outliers_heatmap_data) +
  geom_tile(aes(x=Sample_ID,
                y=gene,
                fill = n_outliers), 
            color = "black")  +
  #theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) 

library(ggVennDiagram)
raw_outliers_for_venn <- outliers %>%
  mutate(sample_gene = paste(Sample_ID, gene, sep = "_")) %>%
  arrange(comparison_cohort) %>%
  select(sample_gene, comparison_cohort) %>%
  group_split(comparison_cohort)


list_of_outliers_for_venn <-  lapply(raw_outliers_for_venn, function(x) x %>% pull(sample_gene))
names(list_of_outliers_for_venn) <- unique(outliers$comparison_cohort) %>% sort

ggVennDiagram(list_of_outliers_for_venn,
              show_intersect = TRUE)
## Warning in geom_text(aes_string(label = "count", text = "text"), x =
## label_coord[, : Ignoring unknown aesthetics: text
ggVennDiagram(list_of_outliers_for_venn) + 
  scale_fill_distiller(palette = "Reds", direction = 1)

export list of genes found only by TCGA

# outliers %>%
#   group_by(Sample_ID, gene) %>%
#   filter(length(comparison_cohort) == 1,
#          "TCGA" %in% comparison_cohort) %>%
#   ungroup %>%
#   select(gene) %>% write_tsv("../gather_input_data/genes found only by TCGA in at least one sample.txt")

Annotate with combined full low level cohort names

collapse_fun <- function(x){ paste(x,collapse = ", ") }

all_outliers_combined_wide <- outliers %>%
  select(-pathway_support, -donor_ID, -high_level_cohort) %>%
  pivot_wider(names_from = Sample_ID,
              values_from = comparison_cohort,
              values_fn = collapse_fun)

n_distinct(outliers$Sample_ID)
## [1] 34
n_distinct(outliers$gene)
## [1] 56
all_outliers_combined_long <- all_outliers_combined_wide %>%
  pivot_longer(-gene,
               names_to = "Sample_ID",
               values_to = "comparison_cohorts") %>%
  na.omit()

How many outliers are present in each combination of cohorts?

tabyl(all_outliers_combined_long,
      comparison_cohorts) %>%
  arrange(desc(n)) %>%
  adorn_pct_formatting() %>%
  adorn_totals() %>%
  kbl() %>%
  kable_styling(full_width = F)
comparison_cohorts n percent
TCGA, Treehouse_pc 27 20.8%
TCGA 21 16.2%
Treehouse_pd 13 10.0%
TCGA, TH03_TH34, Treehouse_pc 12 9.2%
TH03_TH34 11 8.5%
PEDAYA, TCGA, TH03_TH34, Treehouse_pc 9 6.9%
TCGA, TH03_TH34, Treehouse_pc, Treehouse_pd 8 6.2%
PEDAYA, TCGA, TH03_TH34, Treehouse_pc, Treehouse_pd 7 5.4%
PEDAYA 5 3.8%
TCGA, Treehouse_pc, Treehouse_pd 4 3.1%
TCGA, TH03_TH34 3 2.3%
TCGA, Treehouse_pd 3 2.3%
PEDAYA, TCGA, Treehouse_pc, Treehouse_pd 2 1.5%
PEDAYA, TCGA, TH03_TH34 1 0.8%
PEDAYA, TCGA, Treehouse_pc 1 0.8%
PEDAYA, Treehouse_pc 1 0.8%
TH03_TH34, Treehouse_pc 1 0.8%
TH03_TH34, Treehouse_pd 1 0.8%
Total 130

Tile plot of combination of outliers

ggplot(all_outliers_combined_long) +
  geom_tile(aes(x=Sample_ID,
                y=gene,
                fill = comparison_cohorts))  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

n_distinct(all_outliers_combined_long$Sample_ID)                
## [1] 34

Annotate with combined full high level cohort names

high_level_all_outliers_combined_wide <- outliers %>%
  select(-pathway_support, -donor_ID, -comparison_cohort) %>%
  distinct() %>%
  pivot_wider(names_from = Sample_ID,
              values_from = high_level_cohort,
              values_fn = collapse_fun)

n_distinct(outliers$Sample_ID)
## [1] 34
n_distinct(outliers$gene)
## [1] 56
high_level_all_outliers_combined_long <- high_level_all_outliers_combined_wide %>%
  pivot_longer(-gene,
               names_to = "Sample_ID",
               values_to = "comparison_cohorts") %>%
  na.omit()

How many outliers are present in each high level combination of cohorts?

tabyl(high_level_all_outliers_combined_long,
      comparison_cohorts) %>%
  arrange(desc(n)) %>%
  adorn_pct_formatting() %>%
  adorn_totals() %>%
  kbl() %>%
  kable_styling(full_width = F)
comparison_cohorts n percent
TCGA, Treehouse 34 26.2%
TCGA 21 16.2%
TCGA, TH03_TH34, Treehouse 20 15.4%
PEDAYA, TCGA, TH03_TH34, Treehouse 16 12.3%
Treehouse 13 10.0%
TH03_TH34 11 8.5%
PEDAYA 5 3.8%
PEDAYA, TCGA, Treehouse 3 2.3%
TCGA, TH03_TH34 3 2.3%
TH03_TH34, Treehouse 2 1.5%
PEDAYA, TCGA, TH03_TH34 1 0.8%
PEDAYA, Treehouse 1 0.8%
Total 130

Annotate with minimal combined cohort abbreviations

collapse_fun_no_coma <- function(x){ paste(x,collapse = "") }

# backslashes prevent asterisks from being interpreted as italics in the kbl table

all_outliers_min_abbrev_combined_wide <- outliers %>%
  left_join(cohort_codes,
            by=c("comparison_cohort"="cohort_name")) %>%
  mutate(cohort_code_pathway = ifelse(pathway_support,
                                      paste0(cohort_code, "\\*"),
                                      cohort_code)) %>%
  select(-pathway_support, -donor_ID,
         -comparison_cohort,
         -cohort_code) %>%
  pivot_wider(names_from = Sample_ID,
              values_from = cohort_code_pathway,
              values_fn = collapse_fun_no_coma,
              values_fill = "")


all_outliers_min_abbrev_combined_wide %>%
  arrange(gene) %>%
  rename_all(underscore_to_space) %>%
  kbl() %>%
  kable_styling(full_width = F,
                bootstrap_options = "bordered")
gene high level cohort TH34 1162 S01 TH34 1149 S02 TH34 1238 S01 TH34 1349 S01 TH34 1349 S02 TH34 1379 S01 TH34 1380 S01 TH34 1150 S02 TH34 1399 S01 TH34 1400 S01 TH34 1412 S01 TH34 1414 S01 TH34 1415 S01 TH34 1444 S01 TH34 1452 S01 TH34 2292 S01 TH34 2351 S01 TH34 2411 S01 TH34 2666 S01 TH34 1163 S01 TH34 1179 S01 TH34 1239 S01 TH34 1350 S01 TH34 1351 S01 TH34 1352 S01 TH34 1381 S01 TH34 1445 S02 TH34 1446 S01 TH34 1447 S01 TH34 1447 S02 TH34 1455 S01 TH34 1456 S02 TH34 2293 S01 TH34 2410 S01
AKT1 TH03_TH34 S* S*
AKT1 Treehouse D*
AKT2 PEDAYA P*
AKT2 TCGA T*
AKT2 TH03_TH34 S*
AKT2 Treehouse C*D*
ALK PEDAYA P
ALK TCGA T
ALK TH03_TH34 S
ALK Treehouse C
BCL6 TCGA T*
BCL6 Treehouse D
BTK TCGA T* T* T*
BTK TH03_TH34 S*
BTK Treehouse C* C* C*
CCND1 TCGA T*
CCND1 TH03_TH34 S*
CCND2 Treehouse D* D
CCND3 TCGA T* T*
CCNE1 Treehouse D*
CDK4 PEDAYA P*
CDK4 TCGA T* T*
CDK4 TH03_TH34 S* S*
CDK4 Treehouse C* C*D*
CDK9 TCGA T* T*
CDK9 TH03_TH34 S
CDK9 Treehouse D C
CSF1R Treehouse D*
DEPTOR TH03_TH34 S*
ETV1 TCGA T T* T T
ETV1 Treehouse C* C*
FGFR1 TCGA T* T* T*
FGFR1 Treehouse C* C*
FGFR2 TCGA T*
FGFR3 PEDAYA P
FGFR3 TCGA T* T
FGFR3 TH03_TH34 S*
FGFR4 PEDAYA P P P* P P
FGFR4 Treehouse C
FLT4 PEDAYA P*
FLT4 TCGA T T* T
FLT4 TH03_TH34 S S*
FLT4 Treehouse CD C* C
GATA2 TCGA T T*
GATA2 Treehouse CD*
HDAC4 PEDAYA P
HDAC4 TCGA T*
HDAC4 TH03_TH34 S*
HDAC4 Treehouse CD*
HDAC7 Treehouse D*
HMOX1 PEDAYA P P*
HMOX1 TCGA T T* T* T* T* T*
HMOX1 Treehouse CD* C*D* C*D C* D* D* C
HSP90B1 TCGA T*
HSP90B1 TH03_TH34 S*
HSP90B1 Treehouse C*D*
IGF1 PEDAYA P* P* P*
IGF1 TCGA T* T*
IGF1 TH03_TH34 S*
IGF1 Treehouse C*D C*
IGF2 TCGA T* T* T* T T* T* T* T* T* T T* T* T* T* T T T* T*
IGF2 Treehouse C* D* C* C* C* C C* C* C C* D* C C C*
IL6 PEDAYA P*
IL6 TCGA T*
IL6 TH03_TH34 S*
IL6 Treehouse C*
JAK1 PEDAYA P
JAK1 TCGA T T
JAK1 TH03_TH34 S S
JAK1 Treehouse C C
KDR TCGA T* T*
KDR TH03_TH34 S*
KDR Treehouse C*
KIT PEDAYA P P P*
KIT TCGA T T T*
KIT TH03_TH34 S S S*
KIT Treehouse C C C*D*
MAP2K2 TCGA T*
MAP2K2 TH03_TH34 S* S*
MAP2K2 Treehouse C*D*
MAP2K4 TCGA T
MAP2K4 TH03_TH34 S
MAP2K4 Treehouse CD*
MDM2 PEDAYA P* P
MDM2 TCGA T* T
MDM2 TH03_TH34 S* S
MDM2 Treehouse C*D* C
MS4A1 PEDAYA P
MS4A1 TCGA T*
MS4A1 TH03_TH34 S*
MS4A1 Treehouse C*
MTOR TCGA T*
MTOR TH03_TH34 S*
NOTCH3 TCGA T*
NOTCH3 TH03_TH34 S*
NOTCH3 Treehouse C*D*
NTRK2 TH03_TH34 S* S* S* S* S* S* S
NTRK2 Treehouse C
NTRK3 TCGA T T* T T
NTRK3 TH03_TH34 S*
NTRK3 Treehouse CD C* C C
PARP1 Treehouse D
PARP2 TCGA T* T*
PARP2 Treehouse C*D*
PDCD1 PEDAYA P
PDCD1 TCGA T
PDCD1 TH03_TH34 S
PDCD1 Treehouse C
PDGFRA TCGA T*
PIK3CD TCGA T* T* T*
PIK3CD TH03_TH34 S* S*
PIK3CD Treehouse C* C*
PIK3R1 TH03_TH34 S*
PIK3R2 TCGA T*
PIK3R2 TH03_TH34 S*
PIK3R2 Treehouse D* C*
PIK3R5 TCGA T* T*
PIK3R5 Treehouse C* C*
PTCH1 TCGA T* T* T*
PTCH1 Treehouse C* C*
RAF1 Treehouse D*
RPTOR TCGA T*
RPTOR TH03_TH34 S*
STAT1 TH03_TH34 S
STAT2 TCGA T
STAT2 TH03_TH34 S
STAT2 Treehouse CD
STAT5A Treehouse D*
TSC2 TCGA T* T* T*
TSC2 TH03_TH34 S* S*
TSC2 Treehouse C*D* C*
VEGFA TCGA T* T* T*
VEGFA TH03_TH34 S* S*
VEGFA Treehouse C* C* D*
VEGFC PEDAYA P*
VEGFC TCGA T*
VEGFC TH03_TH34 S*
VEGFC Treehouse C*D*
WEE1 TCGA T*
WEE1 TH03_TH34 S
WEE1 Treehouse C*D*

Annotate with combined cohort abbreviations

all_outliers_abbrev_combined_wide <- outliers %>%
  left_join(cohort_codes,
            by=c("comparison_cohort"="cohort_name")) %>%
  select(-pathway_support, -donor_ID,
         -comparison_cohort) %>%
  pivot_wider(names_from = Sample_ID,
              values_from = cohort_code,
              values_fn = collapse_fun,
              values_fill = "")
all_outliers_abbrev_combined_wide %>%
  arrange(gene) %>%
  rename_all(underscore_to_space) %>%
  kbl() %>%
  kable_styling(full_width = F,
                bootstrap_options = "bordered")
gene high level cohort TH34 1162 S01 TH34 1149 S02 TH34 1238 S01 TH34 1349 S01 TH34 1349 S02 TH34 1379 S01 TH34 1380 S01 TH34 1150 S02 TH34 1399 S01 TH34 1400 S01 TH34 1412 S01 TH34 1414 S01 TH34 1415 S01 TH34 1444 S01 TH34 1452 S01 TH34 2292 S01 TH34 2351 S01 TH34 2411 S01 TH34 2666 S01 TH34 1163 S01 TH34 1179 S01 TH34 1239 S01 TH34 1350 S01 TH34 1351 S01 TH34 1352 S01 TH34 1381 S01 TH34 1445 S02 TH34 1446 S01 TH34 1447 S01 TH34 1447 S02 TH34 1455 S01 TH34 1456 S02 TH34 2293 S01 TH34 2410 S01
AKT1 TH03_TH34 S S
AKT1 Treehouse D
AKT2 PEDAYA P
AKT2 TCGA T
AKT2 TH03_TH34 S
AKT2 Treehouse C, D
ALK PEDAYA P
ALK TCGA T
ALK TH03_TH34 S
ALK Treehouse C
BCL6 TCGA T
BCL6 Treehouse D
BTK TCGA T T T
BTK TH03_TH34 S
BTK Treehouse C C C
CCND1 TCGA T
CCND1 TH03_TH34 S
CCND2 Treehouse D D
CCND3 TCGA T T
CCNE1 Treehouse D
CDK4 PEDAYA P
CDK4 TCGA T T
CDK4 TH03_TH34 S S
CDK4 Treehouse C C, D
CDK9 TCGA T T
CDK9 TH03_TH34 S
CDK9 Treehouse D C
CSF1R Treehouse D
DEPTOR TH03_TH34 S
ETV1 TCGA T T T T
ETV1 Treehouse C C
FGFR1 TCGA T T T
FGFR1 Treehouse C C
FGFR2 TCGA T
FGFR3 PEDAYA P
FGFR3 TCGA T T
FGFR3 TH03_TH34 S
FGFR4 PEDAYA P P P P P
FGFR4 Treehouse C
FLT4 PEDAYA P
FLT4 TCGA T T T
FLT4 TH03_TH34 S S
FLT4 Treehouse C, D C C
GATA2 TCGA T T
GATA2 Treehouse C, D
HDAC4 PEDAYA P
HDAC4 TCGA T
HDAC4 TH03_TH34 S
HDAC4 Treehouse C, D
HDAC7 Treehouse D
HMOX1 PEDAYA P P
HMOX1 TCGA T T T T T T
HMOX1 Treehouse C, D C, D C, D C D D C
HSP90B1 TCGA T
HSP90B1 TH03_TH34 S
HSP90B1 Treehouse C, D
IGF1 PEDAYA P P P
IGF1 TCGA T T
IGF1 TH03_TH34 S
IGF1 Treehouse C, D C
IGF2 TCGA T T T T T T T T T T T T T T T T T T
IGF2 Treehouse C D C C C C C C C C D C C C
IL6 PEDAYA P
IL6 TCGA T
IL6 TH03_TH34 S
IL6 Treehouse C
JAK1 PEDAYA P
JAK1 TCGA T T
JAK1 TH03_TH34 S S
JAK1 Treehouse C C
KDR TCGA T T
KDR TH03_TH34 S
KDR Treehouse C
KIT PEDAYA P P P
KIT TCGA T T T
KIT TH03_TH34 S S S
KIT Treehouse C C C, D
MAP2K2 TCGA T
MAP2K2 TH03_TH34 S S
MAP2K2 Treehouse C, D
MAP2K4 TCGA T
MAP2K4 TH03_TH34 S
MAP2K4 Treehouse C, D
MDM2 PEDAYA P P
MDM2 TCGA T T
MDM2 TH03_TH34 S S
MDM2 Treehouse C, D C
MS4A1 PEDAYA P
MS4A1 TCGA T
MS4A1 TH03_TH34 S
MS4A1 Treehouse C
MTOR TCGA T
MTOR TH03_TH34 S
NOTCH3 TCGA T
NOTCH3 TH03_TH34 S
NOTCH3 Treehouse C, D
NTRK2 TH03_TH34 S S S S S S S
NTRK2 Treehouse C
NTRK3 TCGA T T T T
NTRK3 TH03_TH34 S
NTRK3 Treehouse C, D C C C
PARP1 Treehouse D
PARP2 TCGA T T
PARP2 Treehouse C, D
PDCD1 PEDAYA P
PDCD1 TCGA T
PDCD1 TH03_TH34 S
PDCD1 Treehouse C
PDGFRA TCGA T
PIK3CD TCGA T T T
PIK3CD TH03_TH34 S S
PIK3CD Treehouse C C
PIK3R1 TH03_TH34 S
PIK3R2 TCGA T
PIK3R2 TH03_TH34 S
PIK3R2 Treehouse D C
PIK3R5 TCGA T T
PIK3R5 Treehouse C C
PTCH1 TCGA T T T
PTCH1 Treehouse C C
RAF1 Treehouse D
RPTOR TCGA T
RPTOR TH03_TH34 S
STAT1 TH03_TH34 S
STAT2 TCGA T
STAT2 TH03_TH34 S
STAT2 Treehouse C, D
STAT5A Treehouse D
TSC2 TCGA T T T
TSC2 TH03_TH34 S S
TSC2 Treehouse C, D C
VEGFA TCGA T T T
VEGFA TH03_TH34 S S
VEGFA Treehouse C C D
VEGFC PEDAYA P
VEGFC TCGA T
VEGFC TH03_TH34 S
VEGFC Treehouse C, D
WEE1 TCGA T
WEE1 TH03_TH34 S
WEE1 Treehouse C, D

SUMMARY TABLE FOR OUTLIERS DETECTED BY TREEHOUSE and OTHER COHORTS (excludes pan-disease)

treehouse_pc_outliers <- outliers %>%
  filter(comparison_cohort == "Treehouse_pc") %>%
  select(Sample_ID, gene) %>%
  distinct()

outliers_detected_by_treehouse_pc <- left_join(treehouse_pc_outliers, 
                                            outliers %>%
                                              filter(comparison_cohort != "Treehouse_pd"), 
                                            by = c("Sample_ID", "gene"))

n_outliers_detected_by_treehouse_pc <- treehouse_pc_outliers %>%
  nrow()

n_outliers_with_pathway_support_detected_by_treehouse_pc <- outliers_detected_by_treehouse_pc %>%
  filter(pathway_support) %>%
  select(Sample_ID, gene) %>%
  distinct %>%
  nrow()
# these have pathway support in at least one cohort

treehouse_pc_totals_tibble <-  tibble(high_level_cohort= " Total",
                         comparison_cohort = " Total",
                 n_outliers_detected = n_outliers_detected_by_treehouse_pc,
                 n_outliers_with_pathway_support = n_outliers_with_pathway_support_detected_by_treehouse_pc,
                 pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support_detected_by_treehouse_pc/n_outliers_detected_by_treehouse_pc)

treehouse_pc_outlier_summary <- outliers_detected_by_treehouse_pc %>% 
  group_by(comparison_cohort) %>%
  summarize(n_outliers_detected = n(),
         n_outliers_with_pathway_support = sum(pathway_support)) %>%
  ungroup() %>%
  mutate(pct_outliers_detected = 100*n_outliers_detected/n_outliers_detected_by_treehouse_pc,
         pct_outliers_with_pathway_support_detected =
           100*n_outliers_with_pathway_support/n_outliers_with_pathway_support_detected_by_treehouse_pc,
         pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support/n_outliers_detected)
  
treehouse_pc_outlier_summary_with_totals <- 
  bind_rows(treehouse_pc_outlier_summary,
            treehouse_pc_totals_tibble %>% select(-high_level_cohort))

                 
  
treehouse_pc_outlier_summary_with_totals %>% 
  rename_all(underscore_to_space) %>%
  kbl(digits = c(NA, 0, 0, 0, 0)) %>%
  kable_styling(full_width = F)
comparison cohort n outliers detected n outliers with pathway support pct outliers detected pct outliers with pathway support detected pct outliers with pathway support
PEDAYA 20 10 28 19 NA
TCGA 70 50 97 94 NA
TH03_TH34 37 24 51 45 NA
Treehouse_pc 72 47 100 89 NA
Total 72 53 NA NA NA

SUMMARY TABLE FOR OUTLIERS DETECTED BY TREEHOUSE and OTHER COHORTS

treehouse_outliers <- outliers %>%
  filter(str_detect(high_level_cohort, "Treehouse")) %>%
  select(Sample_ID, gene) %>%
  distinct()

outliers_detected_by_treehouse <- left_join(treehouse_outliers, 
                                            outliers, 
                                            by = c("Sample_ID", "gene"))

n_outliers_detected_by_treehouse <- treehouse_outliers %>%
  nrow()

n_outliers_with_pathway_support_detected_by_treehouse <- outliers_detected_by_treehouse %>%
  filter(pathway_support) %>%
  select(Sample_ID, gene) %>%
  distinct %>%
  nrow()
# these have pathway support in at least one cohort

treehouse_totals_tibble <-  tibble(high_level_cohort= " Total",
                         comparison_cohort = " Total",
                 n_outliers_detected = n_outliers_detected_by_treehouse,
                 n_outliers_with_pathway_support = n_outliers_with_pathway_support_detected_by_treehouse,
                 pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support_detected_by_treehouse/n_outliers_detected_by_treehouse)

treehouse_outlier_summary <- outliers_detected_by_treehouse %>% 
  group_by(comparison_cohort) %>%
  summarize(n_outliers_detected = n(),
         n_outliers_with_pathway_support = sum(pathway_support)) %>%
  ungroup() %>%
  mutate(pct_outliers_detected = 100*n_outliers_detected/n_outliers_detected_by_treehouse,
         pct_outliers_with_pathway_support_detected =
           100*n_outliers_with_pathway_support/n_outliers_with_pathway_support_detected_by_treehouse,
         pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support/n_outliers_detected)
  
treehouse_outlier_summary_with_totals <- 
  bind_rows(treehouse_outlier_summary,
            treehouse_totals_tibble %>% select(-high_level_cohort))

                 
  
treehouse_outlier_summary_with_totals %>% 
  rename_all(underscore_to_space) %>%
  kbl(digits = c(NA, 0, 0, 0, 0)) %>%
  kable_styling(full_width = F)
comparison cohort n outliers detected n outliers with pathway support pct outliers detected pct outliers with pathway support detected pct outliers with pathway support
PEDAYA 20 10 22 15 NA
TCGA 73 53 82 78 NA
TH03_TH34 38 25 43 37 NA
Treehouse_pc 72 47 81 69 NA
Treehouse_pd 38 29 43 43 NA
Total 89 68 NA NA NA

Summary table for treehouse outliers detected by high level cohorts

high_level_treehouse_outlier_summary <- outliers_detected_by_treehouse %>% 
  group_by(high_level_cohort, Sample_ID, gene) %>%
  summarize(pathway_support = any(pathway_support)) %>%
  group_by(high_level_cohort) %>%
  summarize(n_outliers_detected = n(),
         n_outliers_with_pathway_support = sum(pathway_support),
         pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support/n_outliers_detected,
         pct_outliers_detected = 100*n_outliers_detected/n_outliers_detected_by_treehouse)
## `summarise()` has grouped output by 'high_level_cohort', 'Sample_ID'. You can
## override using the `.groups` argument.

Function for renaming cohorts

update_cohort_strings <- function(x) {
  str_replace(x,
              "PEDAYA", 
              "Pediatric") %>%
    str_replace("TH03_TH34",
                "Stanford")%>%
    str_replace("Treehouse_pc",
                "Treehouse pan-cancer")%>%
    str_replace("Treehouse_pd",
                "Treehouse pan-disease") %>%
    str_replace_all("_([a-z12][a-z])", " \\1")
  
}

Combined high and low level tables for treehouse outliers

treehouse_high_low <- bind_rows(
  high_level_treehouse_outlier_summary %>% 
    rename(comparison_cohort=high_level_cohort) %>%
    mutate(index = c(6, 5, 4, 1)),
  treehouse_outlier_summary %>%
    filter(str_detect(comparison_cohort, "Treehouse")) %>%
    mutate(index = 2:3)
  # ,
  # treehouse_totals_tibble %>% 
  #   select(-high_level_cohort)  %>%
  #   mutate(index = 7)
) %>% 
  arrange(index) %>%
  select(-index) 

treehouse_high_low_outlier_summary <- treehouse_high_low %>%
  mutate(
    pct_outliers_with_pathway_support_detected =
      100*n_outliers_with_pathway_support/n_outliers_with_pathway_support_detected_by_treehouse,
    `Druggable outliers detected` = 
      paste0(n_outliers_detected, "/", n_outliers_detected_by_treehouse, " (",
             round(pct_outliers_detected), "%)"),
    `Druggable outliers with pathway support` = 
      paste0(n_outliers_with_pathway_support, "/",
             n_outliers_with_pathway_support_detected_by_treehouse, " (",
             round(pct_outliers_with_pathway_support_detected), "%)"),
    `Fraction of druggable outliers with pathway support` = 
      paste0(n_outliers_with_pathway_support, "/", 
             n_outliers_detected, " (",
             round(100*n_outliers_with_pathway_support/n_outliers_detected), "%)")
  ) %>%
  select(comparison_cohort, 
         `Druggable outliers detected`,
         `Druggable outliers with pathway support`,
         `Fraction of druggable outliers with pathway support`) %>%
  bind_rows(treehouse_totals_tibble %>%
              mutate(`Fraction of druggable outliers with pathway support` =
                       paste0(n_outliers_with_pathway_support, "/", 
                              n_outliers_detected, " (",
                              round(100*n_outliers_with_pathway_support/n_outliers_detected), "%)"),
                     `Druggable outliers detected` = as.character(n_outliers_detected),
                     `Druggable outliers with pathway support` = as.character(n_outliers_with_pathway_support)
              ) %>%
              select(comparison_cohort, 
                     `Druggable outliers detected`,
                     `Druggable outliers with pathway support`,
                     `Fraction of druggable outliers with pathway support`)
  ) %>%
  mutate(comparison_cohort = update_cohort_strings(comparison_cohort))



treehouse_high_low_outlier_summary %>%
  rename_all(underscore_to_space_initial_cap) %>%
  kbl() %>%
  kable_styling(full_width = F) %>%
  add_indent(c(2, 3), level_of_indent = 1)
Comparison cohort Druggable outliers detected Druggable outliers with pathway support Fraction of druggable outliers with pathway support
Treehouse 89/89 (100%) 64/68 (94%) 64/89 (72%)
Treehouse pan-cancer 72/89 (81%) 47/68 (69%) 47/72 (65%)
Treehouse pan-disease 38/89 (43%) 29/68 (43%) 29/38 (76%)
Stanford 38/89 (43%) 25/68 (37%) 25/38 (66%)
TCGA 73/89 (82%) 53/68 (78%) 53/73 (73%)
Pediatric 20/89 (22%) 10/68 (15%) 10/20 (50%)
Total 89 68 68/89 (76%)
table_output_name <- paste0("../results/treehouse_outliers_detected_relative_to_different_cohorts_", format(Sys.time(), "%Y_%m_%d-%H_%M_%S_%Y"), ".tsv")

write_tsv(treehouse_high_low_outlier_summary %>%
  rename_all(underscore_to_space_initial_cap), table_output_name)

this output was saved as ../results/treehouse_outliers_detected_relative_to_different_cohorts_2024_02_16-16_12_39_2024.tsv

SUMMARY TABLE FOR OUTLIERS DETECTED BY any COHORT, INCLUDES AND LOW LEVEL COHORTS

n_outliers_detected_by_any_method <- outliers %>%
  select(Sample_ID, gene) %>%
  distinct %>%
  nrow()

n_outliers_with_pathway_support_detected_by_any_method <- outliers %>%
  filter(pathway_support) %>%
  select(Sample_ID, gene) %>%
  distinct %>%
  nrow()
# these have pathway support in at least one cohort

totals_tibble <-  tibble(high_level_cohort= " Total",
                         comparison_cohort = " Total",
                 n_outliers_detected = n_outliers_detected_by_any_method,
                 n_outliers_with_pathway_support = n_outliers_with_pathway_support_detected_by_any_method,
                 pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support_detected_by_any_method/n_outliers_detected_by_any_method)

outlier_summary <- outliers %>% 
  group_by(comparison_cohort) %>%
  summarize(n_outliers_detected = n(),
         n_outliers_with_pathway_support = sum(pathway_support)) %>%
  ungroup() %>%
  mutate(pct_outliers_detected = 100*n_outliers_detected/n_outliers_detected_by_any_method,
         pct_outliers_with_pathway_support_detected =
           100*n_outliers_with_pathway_support/n_outliers_with_pathway_support_detected_by_any_method,
         pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support/n_outliers_detected)
  
outlier_summary_with_totals <- 
  bind_rows(outlier_summary,
            totals_tibble %>% select(-high_level_cohort))

outlier_summary_with_totals %>% 
  rename_all(underscore_to_space) %>%
  kbl(digits = c(NA, 0, 0, 0, 0)) %>%
  kable_styling(full_width = F)
comparison cohort n outliers detected n outliers with pathway support pct outliers detected pct outliers with pathway support detected pct outliers with pathway support
PEDAYA 26 12 20 12 NA
TCGA 98 74 75 73 NA
TH03_TH34 53 39 41 39 NA
Treehouse_pc 72 47 55 47 NA
Treehouse_pd 38 29 29 29 NA
Total 130 101 NA NA NA

Summary table for all outliers and high level cohorts

high_level_outlier_summary <- outliers %>% 
  group_by(high_level_cohort, Sample_ID, gene) %>%
  summarize(pathway_support = any(pathway_support)) %>%
  group_by(high_level_cohort) %>%
  summarize(n_outliers_detected = n(),
         n_outliers_with_pathway_support = sum(pathway_support),
         pct_outliers_with_pathway_support = 100*n_outliers_with_pathway_support/n_outliers_detected,
         pct_outliers_detected = 100*n_outliers_detected/n_outliers_detected_by_any_method)
## `summarise()` has grouped output by 'high_level_cohort', 'Sample_ID'. You can
## override using the `.groups` argument.
high_level_outlier_summary_with_totals <- 
  bind_rows(high_level_outlier_summary %>%
              arrange(desc(high_level_cohort)),
            totals_tibble %>% select(-comparison_cohort))



high_level_outlier_summary_with_totals %>% 
  rename_all(underscore_to_space) %>%
  kbl(format.args = list(big.mark = ","), digits = c(NA, 0, 0, 0, 0)) %>%
  kable_styling(full_width = F)
high level cohort n outliers detected n outliers with pathway support pct outliers with pathway support pct outliers detected
Treehouse 89 64 72 68
TH03_TH34 53 39 74 41
TCGA 98 74 76 75
PEDAYA 26 12 46 20
Total 130 101 78 NA

Combined high and low level tables

high_low <- bind_rows(
  high_level_outlier_summary_with_totals %>% 
    rename(comparison_cohort=high_level_cohort) %>%
    mutate(index = c(1, 4:7)),
  outlier_summary_with_totals %>%
    filter(str_detect(comparison_cohort, "Treehouse")) %>%
    mutate(index = 2:3)
) %>% 
  arrange(index) %>%
  select(-index) 
       

high_low_outlier_summary <- high_low %>%
  filter(! str_detect(comparison_cohort, "Total")) %>%
  mutate(
    comparison_cohort = str_replace(comparison_cohort, "_", " "),
    pct_outliers_with_pathway_support_detected =
      100*n_outliers_with_pathway_support/n_outliers_with_pathway_support_detected_by_any_method,
    `Druggable outliers detected` = 
      paste0(n_outliers_detected, "/", n_outliers_detected_by_any_method, " (",
             round(pct_outliers_detected), "%)"),
    `Druggable outliers with pathway support` = 
      paste0(n_outliers_with_pathway_support, "/",
             n_outliers_with_pathway_support_detected_by_any_method, " (",
             round(pct_outliers_with_pathway_support_detected), "%)"),
    `Fraction of druggable outliers with pathway support` = 
      paste0(n_outliers_with_pathway_support, "/", 
             n_outliers_detected, " (",
             round(100*n_outliers_with_pathway_support/n_outliers_detected), "%)")
  ) %>%
  select(comparison_cohort, 
         `Druggable outliers detected`,
         `Druggable outliers with pathway support`,
         `Fraction of druggable outliers with pathway support`) %>%
  bind_rows(totals_tibble %>%
              mutate(`Fraction of druggable outliers with pathway support` =
                       paste0(n_outliers_with_pathway_support, "/", 
                              n_outliers_detected, " (",
                              round(100*n_outliers_with_pathway_support/n_outliers_detected), "%)"),
                     `Druggable outliers detected` = as.character(n_outliers_detected),
                     `Druggable outliers with pathway support` = as.character(n_outliers_with_pathway_support)
              ) %>%
              select(comparison_cohort, 
                     `Druggable outliers detected`,
                     `Druggable outliers with pathway support`,
                     `Fraction of druggable outliers with pathway support`)
  )

         


  

high_low_outlier_summary %>%
  rename_all(underscore_to_space_initial_cap) %>%
  kbl() %>%
  kable_styling(full_width = F) %>%
  add_indent(c(2, 3), level_of_indent = 1)
Comparison cohort Druggable outliers detected Druggable outliers with pathway support Fraction of druggable outliers with pathway support
Treehouse 89/130 (68%) 64/101 (63%) 64/89 (72%)
Treehouse pc 72/130 (55%) 47/101 (47%) 47/72 (65%)
Treehouse pd 38/130 (29%) 29/101 (29%) 29/38 (76%)
TH03 TH34 53/130 (41%) 39/101 (39%) 39/53 (74%)
TCGA 98/130 (75%) 74/101 (73%) 74/98 (76%)
PEDAYA 26/130 (20%) 12/101 (12%) 12/26 (46%)
Total 130 101 101/130 (78%)

Patient level summary table for all outliers

outliers %>%
  group_by(donor_ID) %>%
  summarize(any_PEDAYA = "PEDAYA" %in% comparison_cohort,
            any_TH03_TH34 = "TH03_TH34" %in% comparison_cohort,
            any_TCGA = "TCGA" %in% comparison_cohort,
            any_Treehouse_pc = "Treehouse_pc" %in% comparison_cohort,
            any_Treehouse_pd = "Treehouse_pd" %in% comparison_cohort,
            any_Treehouse = any_Treehouse_pc | any_Treehouse_pd) %>%
  pivot_longer(starts_with("any")) %>%
  mutate(name = str_remove(name, "any_")) %>%
  group_by(name) %>%
  summarize(n_patients_with_druggable_outliers = sum(value)) %>% 
  kbl() %>%
  kable_styling(full_width = F)
name n_patients_with_druggable_outliers
PEDAYA 18
TCGA 31
TH03_TH34 22
Treehouse 31
Treehouse_pc 30
Treehouse_pd 18

Sample level summary table for all outliers

outliers %>%
  group_by(Sample_ID) %>%
  summarize(any_PEDAYA = "PEDAYA" %in% comparison_cohort,
            any_TH03_TH34 = "TH03_TH34" %in% comparison_cohort,
            any_TCGA = "TCGA" %in% comparison_cohort,
            any_Treehouse_pc = "Treehouse_pc" %in% comparison_cohort,
            any_Treehouse_pd = "Treehouse_pd" %in% comparison_cohort,
            any_Treehouse = any_Treehouse_pc | any_Treehouse_pd) %>%
  pivot_longer(starts_with("any")) %>%
  mutate(name = str_remove(name, "any_")) %>%
  group_by(name) %>%
  summarize(n_samples_with_druggable_outliers = sum(value)) %>% 
  kbl() %>%
  kable_styling(full_width = F)
name n_samples_with_druggable_outliers
PEDAYA 19
TCGA 33
TH03_TH34 23
Treehouse 33
Treehouse_pc 32
Treehouse_pd 18

” The genes represent 56 of the 115 druggable genes we consider (eTable)”

all_druggable_genes <- read_tsv("../input_data/treehouseDruggableGenes_2020-03_25.txt")
## Rows: 115 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gene, group
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_druggable_genes %>%
 mutate( found_as_treehouse_outlier = gene %in% (outliers %>%
  filter(high_level_cohort == "Treehouse") %>%
    pull(gene))) %>%
  tabyl(found_as_treehouse_outlier)
##  found_as_treehouse_outlier  n percent
##                       FALSE 69     0.6
##                        TRUE 46     0.4

REPEAT ANALYSIS USING ONLY OUTLIERS WITH PATHWAY SUPPORT

Tile plot of outliers with pathway support

ggplot(outliers %>%
         filter(pathway_support)) +
  geom_tile(aes(x=comparison_cohort,
                y=gene, 
                fill = comparison_cohort)) +
  facet_wrap(~Sample_ID,
             nrow = 1) +
  theme(#axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    axis.text.x = element_blank(),
    strip.text.x = element_text(angle = 90),
        ) +
  xlab("")  +
  scale_fill_bright()

Heatmap shows number of cohorts in which outlier were detected

I can make this look better if we decide to use it, but it’s non-trivial

pathway_outliers_heatmap_data <- outliers %>%
  filter(pathway_support) %>%
  group_by(Sample_ID, gene) %>%
  summarize(n_outliers = n()) 
## `summarise()` has grouped output by 'Sample_ID'. You can override using the
## `.groups` argument.
ggplot(pathway_outliers_heatmap_data) +
  geom_tile(aes(x=Sample_ID,
                y=gene,
                fill = n_outliers), 
            color = "black")  +
  #theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) 

raw_pathway_support_outliers_for_venn <- outliers %>%
  filter(pathway_support)  %>%
  mutate(sample_gene = paste(Sample_ID, gene, sep = "_")) %>%
  arrange(comparison_cohort) %>%
  select(sample_gene, comparison_cohort) %>%
  group_split(comparison_cohort)


list_of_pathway_support_outliers_for_venn <-  lapply(raw_pathway_support_outliers_for_venn, function(x) x %>% pull(sample_gene))
names(list_of_pathway_support_outliers_for_venn) <- outliers %>%
  filter(pathway_support) %>%
  arrange(comparison_cohort) %>%
  select(comparison_cohort) %>%
  distinct() %>%
  pull(comparison_cohort)

ggVennDiagram(list_of_pathway_support_outliers_for_venn,
              show_intersect = TRUE)
## Warning in geom_text(aes_string(label = "count", text = "text"), x =
## label_coord[, : Ignoring unknown aesthetics: text
ggVennDiagram(list_of_pathway_support_outliers_for_venn) + 
  scale_fill_distiller(palette = "Reds", direction = 1)

Annotate with combined full cohort names

outliers_with_pathway_support_combined_wide <- outliers %>%
  filter(pathway_support) %>%
  select(-pathway_support, -donor_ID) %>%
  pivot_wider(names_from = Sample_ID,
              values_from = comparison_cohort,
              values_fn = collapse_fun)

outliers_with_pathway_support_combined_long <- outliers_with_pathway_support_combined_wide %>%
  pivot_longer(c(-gene, -high_level_cohort),
               names_to = "Sample_ID",
               values_to = "comparison_cohorts") %>%
  na.omit()

How many outliers with pathway support are present in each combination of cohorts?

tabyl(outliers_with_pathway_support_combined_long,
      comparison_cohorts) %>%
  arrange(desc(n)) %>%
  adorn_pct_formatting() %>%
  adorn_totals() %>%
  kbl() %>%
  kable_styling(full_width = F)
comparison_cohorts n percent
TCGA 74 39.2%
TH03_TH34 39 20.6%
Treehouse_pc 35 18.5%
Treehouse_pd 17 9.0%
PEDAYA 12 6.3%
Treehouse_pc, Treehouse_pd 12 6.3%
Total 189
ggplot(outliers_with_pathway_support_combined_long) +
  geom_tile(aes(x=Sample_ID,
                y=gene,
                fill = comparison_cohorts))  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

n_distinct(outliers_with_pathway_support_combined_long$Sample_ID)                
## [1] 32

Patient level summary table for outliers with pathway support

outliers %>%
  filter(pathway_support) %>%
  group_by(donor_ID) %>%
  summarize(any_PEDAYA = "PEDAYA" %in% comparison_cohort,
            any_TH03_TH34 = "TH03_TH34" %in% comparison_cohort,
            any_TCGA = "TCGA" %in% comparison_cohort,
            any_Treehouse_pc = "Treehouse_pc" %in% comparison_cohort,
            any_Treehouse_pd = "Treehouse_pd" %in% comparison_cohort,
            any_Treehouse = any_Treehouse_pc | any_Treehouse_pd) %>%
  pivot_longer(starts_with("any")) %>%
  mutate(name = str_remove(name, "any_")) %>%
  group_by(name) %>%
  summarize(n_patients_with_druggable_outliers = sum(value)) %>% 
  kbl() %>%
  kable_styling(full_width = F)
name n_patients_with_druggable_outliers
PEDAYA 8
TCGA 26
TH03_TH34 16
Treehouse 26
Treehouse_pc 22
Treehouse_pd 13

Annotate with combined cohort abbreviations

outliers_with_pathway_support_abbrev_combined_wide <- outliers %>%
  filter(pathway_support) %>%
  left_join(cohort_codes,
            by=c("comparison_cohort"="cohort_name")) %>%
  select(-pathway_support, -donor_ID,
         -comparison_cohort) %>%
  pivot_wider(names_from = Sample_ID,
              values_from = cohort_code,
              values_fn = collapse_fun,
              values_fill = "")

Big table of outliers with pathway support

outliers_with_pathway_support_abbrev_combined_wide %>%
  arrange(gene) %>%
  rename_all(underscore_to_space) %>%
  kbl() %>%
  kable_styling(full_width = F,
                bootstrap_options = "bordered")
gene high level cohort TH34 1149 S02 TH34 1238 S01 TH34 1399 S01 TH34 1400 S01 TH34 1412 S01 TH34 1415 S01 TH34 1444 S01 TH34 1452 S01 TH34 2292 S01 TH34 2411 S01 TH34 1179 S01 TH34 1239 S01 TH34 1349 S01 TH34 1349 S02 TH34 1350 S01 TH34 1352 S01 TH34 1379 S01 TH34 1380 S01 TH34 1381 S01 TH34 1150 S02 TH34 1414 S01 TH34 1445 S02 TH34 1447 S01 TH34 1447 S02 TH34 1455 S01 TH34 1456 S02 TH34 2293 S01 TH34 2351 S01 TH34 2410 S01 TH34 1446 S01 TH34 1162 S01 TH34 1351 S01
AKT1 TH03_TH34 S S
AKT1 Treehouse D
AKT2 PEDAYA P
AKT2 TCGA T
AKT2 TH03_TH34 S
AKT2 Treehouse C, D
BCL6 TCGA T
BTK TCGA T T T
BTK TH03_TH34 S
BTK Treehouse C C C
CCND1 TCGA T
CCND1 TH03_TH34 S
CCND2 Treehouse D
CCND3 TCGA T T
CCNE1 Treehouse D
CDK4 PEDAYA P
CDK4 TCGA T T
CDK4 TH03_TH34 S S
CDK4 Treehouse C, D C
CDK9 TCGA T T
CSF1R Treehouse D
DEPTOR TH03_TH34 S
ETV1 TCGA T
ETV1 Treehouse C C
FGFR1 TCGA T T T
FGFR1 Treehouse C C
FGFR2 TCGA T
FGFR3 TCGA T
FGFR3 TH03_TH34 S
FGFR4 PEDAYA P
FLT4 PEDAYA P
FLT4 TCGA T
FLT4 TH03_TH34 S
FLT4 Treehouse C
GATA2 TCGA T
GATA2 Treehouse D
HDAC4 TCGA T
HDAC4 TH03_TH34 S
HDAC4 Treehouse D
HDAC7 Treehouse D
HMOX1 PEDAYA P
HMOX1 TCGA T T T T T
HMOX1 Treehouse C C C, D D D D
HSP90B1 TCGA T
HSP90B1 TH03_TH34 S
HSP90B1 Treehouse C, D
IGF1 PEDAYA P P P
IGF1 TCGA T T
IGF1 TH03_TH34 S
IGF1 Treehouse C C
IGF2 TCGA T T T T T T T T T T T T T T
IGF2 Treehouse C C C C C D C D C C
IL6 PEDAYA P
IL6 TCGA T
IL6 TH03_TH34 S
IL6 Treehouse C
KDR TCGA T T
KDR TH03_TH34 S
KDR Treehouse C
KIT PEDAYA P
KIT TCGA T
KIT TH03_TH34 S
KIT Treehouse C, D
MAP2K2 TCGA T
MAP2K2 TH03_TH34 S S
MAP2K2 Treehouse C, D
MAP2K4 Treehouse D
MDM2 PEDAYA P
MDM2 TCGA T
MDM2 TH03_TH34 S
MDM2 Treehouse C, D
MS4A1 TCGA T
MS4A1 TH03_TH34 S
MS4A1 Treehouse C
MTOR TCGA T
MTOR TH03_TH34 S
NOTCH3 TCGA T
NOTCH3 TH03_TH34 S
NOTCH3 Treehouse C, D
NTRK2 TH03_TH34 S S S S S S
NTRK3 TCGA T
NTRK3 TH03_TH34 S
NTRK3 Treehouse C
PARP2 TCGA T T
PARP2 Treehouse C, D
PDGFRA TCGA T
PIK3CD TCGA T T T
PIK3CD TH03_TH34 S S
PIK3CD Treehouse C C
PIK3R1 TH03_TH34 S
PIK3R2 TCGA T
PIK3R2 TH03_TH34 S
PIK3R2 Treehouse D C
PIK3R5 TCGA T T
PIK3R5 Treehouse C C
PTCH1 TCGA T T T
PTCH1 Treehouse C C
RAF1 Treehouse D
RPTOR TCGA T
RPTOR TH03_TH34 S
STAT5A Treehouse D
TSC2 TCGA T T T
TSC2 TH03_TH34 S S
TSC2 Treehouse C, D C
VEGFA TCGA T T T
VEGFA TH03_TH34 S S
VEGFA Treehouse C C D
VEGFC PEDAYA P
VEGFC TCGA T
VEGFC TH03_TH34 S
VEGFC Treehouse C, D
WEE1 TCGA T
WEE1 Treehouse C, D

Detail table for all samples

table_output_name <- paste0("../results/detailed_treehouse_outliers_detected_relative_to_different_cohorts_", format(Sys.time(), "%Y_%m_%d-%H_%M_%S_%Y"), ".tsv")


outliers %>%
  rename(outlier_gene = gene) %>%
  mutate(original_comparison_cohort_name = comparison_cohort,
         comparison_cohort = update_cohort_strings(original_comparison_cohort_name)) %>%
  arrange(Sample_ID,
          outlier_gene, 
          comparison_cohort) %>%
  select(Dataset = Sample_ID,
         `Outlier gene` = outlier_gene,
         `Comparison cohort` = comparison_cohort,
         `Pathway support` = pathway_support) %>% 
  write_tsv(table_output_name)

this output was saved as ../results/detailed_treehouse_outliers_detected_relative_to_different_cohorts_2024_02_16-16_12_43_2024.tsv

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggVennDiagram_1.2.2 cowplot_1.1.1       gridExtra_2.3      
##  [4] kableExtra_1.3.4    khroma_1.10.0       janitor_2.1.0      
##  [7] forcats_0.5.2       stringr_1.5.0       dplyr_1.0.10       
## [10] purrr_0.3.5         readr_2.1.3         tidyr_1.2.1        
## [13] tibble_3.2.1        ggplot2_3.4.4       tidyverse_1.3.2    
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.6.3            sf_1.0-9            lubridate_1.9.0    
##  [4] bit64_4.0.5         RColorBrewer_1.1-3  webshot_0.5.4      
##  [7] httr_1.4.4          tools_4.2.1         backports_1.4.1    
## [10] bslib_0.5.0         utf8_1.2.3          R6_2.5.1           
## [13] KernSmooth_2.23-20  lazyeval_0.2.2      DBI_1.1.3          
## [16] colorspace_2.1-0    withr_2.5.0         tidyselect_1.2.0   
## [19] bit_4.0.5           compiler_4.2.1      cli_3.6.1          
## [22] rvest_1.0.3         xml2_1.3.3          plotly_4.10.1      
## [25] labeling_0.4.2      sass_0.4.7          scales_1.2.1       
## [28] classInt_0.4-9      proxy_0.4-27        systemfonts_1.0.4  
## [31] digest_0.6.33       yulab.utils_0.0.6   rmarkdown_2.23     
## [34] svglite_2.1.0       pkgconfig_2.0.3     htmltools_0.5.5    
## [37] dbplyr_2.2.1        fastmap_1.1.1       highr_0.10         
## [40] htmlwidgets_1.6.2   rlang_1.1.1         readxl_1.4.1       
## [43] rstudioapi_0.14     jquerylib_0.1.4     farver_2.1.1       
## [46] generics_0.1.3      jsonlite_1.8.7      crosstalk_1.2.0    
## [49] vroom_1.6.0         googlesheets4_1.0.1 magrittr_2.0.3     
## [52] Rcpp_1.0.11         munsell_0.5.0       fansi_1.0.4        
## [55] lifecycle_1.0.3     stringi_1.7.12      yaml_2.3.7         
## [58] snakecase_0.11.0    grid_4.2.1          parallel_4.2.1     
## [61] crayon_1.5.2        haven_2.5.1         hms_1.1.2          
## [64] knitr_1.43          pillar_1.9.0        reprex_2.0.2       
## [67] glue_1.6.2          evaluate_0.21       data.table_1.14.6  
## [70] modelr_0.1.10       vctrs_0.6.3         tzdb_0.3.0         
## [73] cellranger_1.1.0    gtable_0.3.3        assertthat_0.2.1   
## [76] cachem_1.0.8        xfun_0.39           broom_1.0.1        
## [79] e1071_1.7-13        class_7.3-20        googledrive_2.0.0  
## [82] RVenn_1.1.0         viridisLite_0.4.2   gargle_1.2.1       
## [85] units_0.8-1         timechange_0.1.1    ellipsis_0.3.2